1  Project One

Author

Amahle Nzimande

QUESTION ONE

down <- nrow(airquality)
across <- ncol(airquality)
sum <- 0

print('The following rows have missing information: ')
[1] "The following rows have missing information: "
for(i in 1:down){
  for(j in 1:across){
    if(is.na(airquality[i,j])){
      sum <- sum + 1
      print(i)
      break}
  }
}
[1] 5
[1] 6
[1] 10
[1] 11
[1] 25
[1] 26
[1] 27
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 39
[1] 42
[1] 43
[1] 45
[1] 46
[1] 52
[1] 53
[1] 54
[1] 55
[1] 56
[1] 57
[1] 58
[1] 59
[1] 60
[1] 61
[1] 65
[1] 72
[1] 75
[1] 83
[1] 84
[1] 96
[1] 97
[1] 98
[1] 102
[1] 103
[1] 107
[1] 115
[1] 119
[1] 150
print(paste0('In total there are ', sum, ' rows with missing information.' ))
[1] "In total there are 42 rows with missing information."

QUESTION TWO

my_table <- data.frame(
  Column = c('Temperature', 'Ozone'),
  Mean = c(mean(airquality[,4]), mean(airquality[,1], na.rm=TRUE)),
  SD = c(sd(airquality[,4]), sd(airquality[,1], na.rm=TRUE)),
  Min = c(min(airquality[,4]), min(airquality[,1], na.rm=TRUE)),
  Max = c(max(airquality[,4]), max(airquality[,1], na.rm=TRUE))
)

print(my_table)
       Column     Mean       SD Min Max
1 Temperature 77.88235  9.46527  56  97
2       Ozone 42.12931 32.98788   1 168

QUESTION THREE

data(cars)

Y <- cars$dist
X <- cbind(1, cars$speed)

my_funct <- function(design, response) {
  
  a <- t(design)%*%design
  b <- solve(a)
  c <- t(design)%*%response
  d <- b%*%c
  
  return(d)
}

print(my_funct(X, Y))
           [,1]
[1,] -17.579095
[2,]   3.932409

QUESTION FOUR

model <- lm(cars$dist~cars$speed, data=cars )
summary(model)

Call:
lm(formula = cars$dist ~ cars$speed, data = cars)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.069  -9.525  -2.272   9.215  43.201 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -17.5791     6.7584  -2.601   0.0123 *  
cars$speed    3.9324     0.4155   9.464 1.49e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 15.38 on 48 degrees of freedom
Multiple R-squared:  0.6511,    Adjusted R-squared:  0.6438 
F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12